1 package org.apache.lucene.codecs.memory;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import java.io.IOException;
21 import java.util.Collection;
22 import java.util.Collections;
23 import java.util.Iterator;
24 import java.util.Map;
25 import java.util.TreeMap;
26
27 import org.apache.lucene.codecs.FieldsConsumer;
28 import org.apache.lucene.codecs.FieldsProducer;
29 import org.apache.lucene.codecs.PostingsFormat;
30 import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
31 import org.apache.lucene.index.DocsAndPositionsEnum;
32 import org.apache.lucene.index.FieldInfo;
33 import org.apache.lucene.index.Fields;
34 import org.apache.lucene.index.IndexOptions;
35 import org.apache.lucene.index.OrdTermState;
36 import org.apache.lucene.index.PostingsEnum;
37 import org.apache.lucene.index.SegmentReadState;
38 import org.apache.lucene.index.SegmentWriteState;
39 import org.apache.lucene.index.TermState;
40 import org.apache.lucene.index.Terms;
41 import org.apache.lucene.index.TermsEnum;
42 import org.apache.lucene.store.IOContext;
43 import org.apache.lucene.store.RAMOutputStream;
44 import org.apache.lucene.util.Accountable;
45 import org.apache.lucene.util.Accountables;
46 import org.apache.lucene.util.ArrayUtil;
47 import org.apache.lucene.util.BytesRef;
48 import org.apache.lucene.util.RamUsageEstimator;
49 import org.apache.lucene.util.automaton.CompiledAutomaton;
50 import org.apache.lucene.util.automaton.RunAutomaton;
51 import org.apache.lucene.util.automaton.Transition;
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 public final class DirectPostingsFormat extends PostingsFormat {
77
78 private final int minSkipCount;
79 private final int lowFreqCutoff;
80
81 private final static int DEFAULT_MIN_SKIP_COUNT = 8;
82 private final static int DEFAULT_LOW_FREQ_CUTOFF = 32;
83
84
85
86
87
88 public DirectPostingsFormat() {
89 this(DEFAULT_MIN_SKIP_COUNT, DEFAULT_LOW_FREQ_CUTOFF);
90 }
91
92
93
94
95
96
97 public DirectPostingsFormat(int minSkipCount, int lowFreqCutoff) {
98 super("Direct");
99 this.minSkipCount = minSkipCount;
100 this.lowFreqCutoff = lowFreqCutoff;
101 }
102
103 @Override
104 public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
105 return PostingsFormat.forName("Lucene50").fieldsConsumer(state);
106 }
107
108 @Override
109 public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
110 FieldsProducer postings = PostingsFormat.forName("Lucene50").fieldsProducer(state);
111 if (state.context.context != IOContext.Context.MERGE) {
112 FieldsProducer loadedPostings;
113 try {
114 postings.checkIntegrity();
115 loadedPostings = new DirectFields(state, postings, minSkipCount, lowFreqCutoff);
116 } finally {
117 postings.close();
118 }
119 return loadedPostings;
120 } else {
121
122 return postings;
123 }
124 }
125
126 private static final class DirectFields extends FieldsProducer {
127 private final Map<String,DirectField> fields = new TreeMap<>();
128
129 public DirectFields(SegmentReadState state, Fields fields, int minSkipCount, int lowFreqCutoff) throws IOException {
130 for (String field : fields) {
131 this.fields.put(field, new DirectField(state, field, fields.terms(field), minSkipCount, lowFreqCutoff));
132 }
133 }
134
135 @Override
136 public Iterator<String> iterator() {
137 return Collections.unmodifiableSet(fields.keySet()).iterator();
138 }
139
140 @Override
141 public Terms terms(String field) {
142 return fields.get(field);
143 }
144
145 @Override
146 public int size() {
147 return fields.size();
148 }
149
150 @Override
151 public void close() {
152 }
153
154 @Override
155 public long ramBytesUsed() {
156 long sizeInBytes = 0;
157 for(Map.Entry<String,DirectField> entry: fields.entrySet()) {
158 sizeInBytes += entry.getKey().length() * RamUsageEstimator.NUM_BYTES_CHAR;
159 sizeInBytes += entry.getValue().ramBytesUsed();
160 }
161 return sizeInBytes;
162 }
163
164 @Override
165 public Collection<Accountable> getChildResources() {
166 return Accountables.namedAccountables("field", fields);
167 }
168
169 @Override
170 public void checkIntegrity() throws IOException {
171
172
173 }
174
175 @Override
176 public String toString() {
177 return getClass().getSimpleName() + "(fields=" + fields.size() + ")";
178 }
179 }
180
181 private final static class DirectField extends Terms implements Accountable {
182
183 private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DirectField.class);
184
185 private static abstract class TermAndSkip implements Accountable {
186 public int[] skips;
187 }
188
189 private static final class LowFreqTerm extends TermAndSkip {
190
191 private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(HighFreqTerm.class);
192
193 public final int[] postings;
194 public final byte[] payloads;
195 public final int docFreq;
196 public final int totalTermFreq;
197
198 public LowFreqTerm(int[] postings, byte[] payloads, int docFreq, int totalTermFreq) {
199 this.postings = postings;
200 this.payloads = payloads;
201 this.docFreq = docFreq;
202 this.totalTermFreq = totalTermFreq;
203 }
204
205 @Override
206 public long ramBytesUsed() {
207 return BASE_RAM_BYTES_USED +
208 ((postings!=null) ? RamUsageEstimator.sizeOf(postings) : 0) +
209 ((payloads!=null) ? RamUsageEstimator.sizeOf(payloads) : 0);
210 }
211
212 @Override
213 public Collection<Accountable> getChildResources() {
214 return Collections.emptyList();
215 }
216
217 }
218
219
220 private static final class HighFreqTerm extends TermAndSkip {
221
222 private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(HighFreqTerm.class);
223
224 public final long totalTermFreq;
225 public final int[] docIDs;
226 public final int[] freqs;
227 public final int[][] positions;
228 public final byte[][][] payloads;
229
230 public HighFreqTerm(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads, long totalTermFreq) {
231 this.docIDs = docIDs;
232 this.freqs = freqs;
233 this.positions = positions;
234 this.payloads = payloads;
235 this.totalTermFreq = totalTermFreq;
236 }
237
238 @Override
239 public long ramBytesUsed() {
240 long sizeInBytes = BASE_RAM_BYTES_USED;
241 sizeInBytes += (docIDs!=null)? RamUsageEstimator.sizeOf(docIDs) : 0;
242 sizeInBytes += (freqs!=null)? RamUsageEstimator.sizeOf(freqs) : 0;
243
244 if(positions != null) {
245 sizeInBytes += RamUsageEstimator.shallowSizeOf(positions);
246 for(int[] position : positions) {
247 sizeInBytes += (position!=null) ? RamUsageEstimator.sizeOf(position) : 0;
248 }
249 }
250
251 if (payloads != null) {
252 sizeInBytes += RamUsageEstimator.shallowSizeOf(payloads);
253 for(byte[][] payload : payloads) {
254 if(payload != null) {
255 sizeInBytes += RamUsageEstimator.shallowSizeOf(payload);
256 for(byte[] pload : payload) {
257 sizeInBytes += (pload!=null) ? RamUsageEstimator.sizeOf(pload) : 0;
258 }
259 }
260 }
261 }
262
263 return sizeInBytes;
264 }
265
266 @Override
267 public Collection<Accountable> getChildResources() {
268 return Collections.emptyList();
269 }
270
271 }
272
273 private final byte[] termBytes;
274 private final int[] termOffsets;
275
276 private final int[] skips;
277 private final int[] skipOffsets;
278
279 private final TermAndSkip[] terms;
280 private final boolean hasFreq;
281 private final boolean hasPos;
282 private final boolean hasOffsets;
283 private final boolean hasPayloads;
284 private final long sumTotalTermFreq;
285 private final int docCount;
286 private final long sumDocFreq;
287 private int skipCount;
288
289
290
291 private int count;
292 private int[] sameCounts = new int[10];
293 private final int minSkipCount;
294
295 private final static class IntArrayWriter {
296 private int[] ints = new int[10];
297 private int upto;
298
299 public void add(int value) {
300 if (ints.length == upto) {
301 ints = ArrayUtil.grow(ints);
302 }
303 ints[upto++] = value;
304 }
305
306 public int[] get() {
307 final int[] arr = new int[upto];
308 System.arraycopy(ints, 0, arr, 0, upto);
309 upto = 0;
310 return arr;
311 }
312 }
313
314 public DirectField(SegmentReadState state, String field, Terms termsIn, int minSkipCount, int lowFreqCutoff) throws IOException {
315 final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
316
317 sumTotalTermFreq = termsIn.getSumTotalTermFreq();
318 sumDocFreq = termsIn.getSumDocFreq();
319 docCount = termsIn.getDocCount();
320
321 final int numTerms = (int) termsIn.size();
322 if (numTerms == -1) {
323 throw new IllegalArgumentException("codec does not provide Terms.size()");
324 }
325 terms = new TermAndSkip[numTerms];
326 termOffsets = new int[1+numTerms];
327
328 byte[] termBytes = new byte[1024];
329
330 this.minSkipCount = minSkipCount;
331
332 hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS) > 0;
333 hasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) > 0;
334 hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) > 0;
335 hasPayloads = fieldInfo.hasPayloads();
336
337 BytesRef term;
338 PostingsEnum postingsEnum = null;
339 PostingsEnum docsAndPositionsEnum = null;
340 final TermsEnum termsEnum = termsIn.iterator();
341 int termOffset = 0;
342
343 final IntArrayWriter scratch = new IntArrayWriter();
344
345
346 final RAMOutputStream ros = new RAMOutputStream();
347
348
349
350
351
352 while ((term = termsEnum.next()) != null) {
353 final int docFreq = termsEnum.docFreq();
354 final long totalTermFreq = termsEnum.totalTermFreq();
355
356
357
358
359
360 termOffsets[count] = termOffset;
361
362 if (termBytes.length < (termOffset + term.length)) {
363 termBytes = ArrayUtil.grow(termBytes, termOffset + term.length);
364 }
365 System.arraycopy(term.bytes, term.offset, termBytes, termOffset, term.length);
366 termOffset += term.length;
367 termOffsets[count+1] = termOffset;
368
369 if (hasPos) {
370 docsAndPositionsEnum = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL);
371 } else {
372 postingsEnum = termsEnum.postings(postingsEnum);
373 }
374
375 final TermAndSkip ent;
376
377 final PostingsEnum postingsEnum2;
378 if (hasPos) {
379 postingsEnum2 = docsAndPositionsEnum;
380 } else {
381 postingsEnum2 = postingsEnum;
382 }
383
384 int docID;
385
386 if (docFreq <= lowFreqCutoff) {
387
388 ros.reset();
389
390
391 while ((docID = postingsEnum2.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
392 scratch.add(docID);
393 if (hasFreq) {
394 final int freq = postingsEnum2.freq();
395 scratch.add(freq);
396 if (hasPos) {
397 for(int pos=0;pos<freq;pos++) {
398 scratch.add(docsAndPositionsEnum.nextPosition());
399 if (hasOffsets) {
400 scratch.add(docsAndPositionsEnum.startOffset());
401 scratch.add(docsAndPositionsEnum.endOffset());
402 }
403 if (hasPayloads) {
404 final BytesRef payload = docsAndPositionsEnum.getPayload();
405 if (payload != null) {
406 scratch.add(payload.length);
407 ros.writeBytes(payload.bytes, payload.offset, payload.length);
408 } else {
409 scratch.add(0);
410 }
411 }
412 }
413 }
414 }
415 }
416
417 final byte[] payloads;
418 if (hasPayloads) {
419 payloads = new byte[(int) ros.getFilePointer()];
420 ros.writeTo(payloads, 0);
421 } else {
422 payloads = null;
423 }
424
425 final int[] postings = scratch.get();
426
427 ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
428 } else {
429 final int[] docs = new int[docFreq];
430 final int[] freqs;
431 final int[][] positions;
432 final byte[][][] payloads;
433 if (hasFreq) {
434 freqs = new int[docFreq];
435 if (hasPos) {
436 positions = new int[docFreq][];
437 if (hasPayloads) {
438 payloads = new byte[docFreq][][];
439 } else {
440 payloads = null;
441 }
442 } else {
443 positions = null;
444 payloads = null;
445 }
446 } else {
447 freqs = null;
448 positions = null;
449 payloads = null;
450 }
451
452
453
454 int upto = 0;
455 while ((docID = postingsEnum2.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
456 docs[upto] = docID;
457 if (hasFreq) {
458 final int freq = postingsEnum2.freq();
459 freqs[upto] = freq;
460 if (hasPos) {
461 final int mult;
462 if (hasOffsets) {
463 mult = 3;
464 } else {
465 mult = 1;
466 }
467 if (hasPayloads) {
468 payloads[upto] = new byte[freq][];
469 }
470 positions[upto] = new int[mult*freq];
471 int posUpto = 0;
472 for(int pos=0;pos<freq;pos++) {
473 positions[upto][posUpto] = docsAndPositionsEnum.nextPosition();
474 if (hasPayloads) {
475 BytesRef payload = docsAndPositionsEnum.getPayload();
476 if (payload != null) {
477 byte[] payloadBytes = new byte[payload.length];
478 System.arraycopy(payload.bytes, payload.offset, payloadBytes, 0, payload.length);
479 payloads[upto][pos] = payloadBytes;
480 }
481 }
482 posUpto++;
483 if (hasOffsets) {
484 positions[upto][posUpto++] = docsAndPositionsEnum.startOffset();
485 positions[upto][posUpto++] = docsAndPositionsEnum.endOffset();
486 }
487 }
488 }
489 }
490
491 upto++;
492 }
493 assert upto == docFreq;
494 ent = new HighFreqTerm(docs, freqs, positions, payloads, totalTermFreq);
495 }
496
497 terms[count] = ent;
498 setSkips(count, termBytes);
499 count++;
500 }
501
502
503 termOffsets[count] = termOffset;
504
505 finishSkips();
506
507
508
509 this.termBytes = new byte[termOffset];
510 System.arraycopy(termBytes, 0, this.termBytes, 0, termOffset);
511
512
513 this.skips = new int[skipCount];
514 this.skipOffsets = new int[1+numTerms];
515
516 int skipOffset = 0;
517 for(int i=0;i<numTerms;i++) {
518 final int[] termSkips = terms[i].skips;
519 skipOffsets[i] = skipOffset;
520 if (termSkips != null) {
521 System.arraycopy(termSkips, 0, skips, skipOffset, termSkips.length);
522 skipOffset += termSkips.length;
523 terms[i].skips = null;
524 }
525 }
526 this.skipOffsets[numTerms] = skipOffset;
527 assert skipOffset == skipCount;
528 }
529
530 @Override
531 public long ramBytesUsed() {
532 long sizeInBytes = BASE_RAM_BYTES_USED;
533 sizeInBytes += ((termBytes!=null) ? RamUsageEstimator.sizeOf(termBytes) : 0);
534 sizeInBytes += ((termOffsets!=null) ? RamUsageEstimator.sizeOf(termOffsets) : 0);
535 sizeInBytes += ((skips!=null) ? RamUsageEstimator.sizeOf(skips) : 0);
536 sizeInBytes += ((skipOffsets!=null) ? RamUsageEstimator.sizeOf(skipOffsets) : 0);
537 sizeInBytes += ((sameCounts!=null) ? RamUsageEstimator.sizeOf(sameCounts) : 0);
538
539 if(terms!=null) {
540 sizeInBytes += RamUsageEstimator.shallowSizeOf(terms);
541 for(TermAndSkip termAndSkip : terms) {
542 sizeInBytes += (termAndSkip!=null) ? termAndSkip.ramBytesUsed() : 0;
543 }
544 }
545
546 return sizeInBytes;
547 }
548
549 @Override
550 public Collection<Accountable> getChildResources() {
551 return Collections.emptyList();
552 }
553
554 @Override
555 public String toString() {
556 return "DirectTerms(terms=" + terms.length + ",postings=" + sumDocFreq + ",positions=" + sumTotalTermFreq + ",docs=" + docCount + ")";
557 }
558
559
560 int compare(int ord, BytesRef other) {
561 final byte[] otherBytes = other.bytes;
562
563 int upto = termOffsets[ord];
564 final int termLen = termOffsets[1+ord] - upto;
565 int otherUpto = other.offset;
566
567 final int stop = upto + Math.min(termLen, other.length);
568 while (upto < stop) {
569 int diff = (termBytes[upto++] & 0xFF) - (otherBytes[otherUpto++] & 0xFF);
570 if (diff != 0) {
571 return diff;
572 }
573 }
574
575
576 return termLen - other.length;
577 }
578
579 private void setSkips(int termOrd, byte[] termBytes) {
580
581 final int termLength = termOffsets[termOrd+1] - termOffsets[termOrd];
582
583 if (sameCounts.length < termLength) {
584 sameCounts = ArrayUtil.grow(sameCounts, termLength);
585 }
586
587
588 if (termOrd > 0) {
589 final int lastTermLength = termOffsets[termOrd] - termOffsets[termOrd-1];
590 final int limit = Math.min(termLength, lastTermLength);
591
592 int lastTermOffset = termOffsets[termOrd-1];
593 int termOffset = termOffsets[termOrd];
594
595 int i = 0;
596 for(;i<limit;i++) {
597 if (termBytes[lastTermOffset++] == termBytes[termOffset++]) {
598 sameCounts[i]++;
599 } else {
600 for(;i<limit;i++) {
601 if (sameCounts[i] >= minSkipCount) {
602
603 saveSkip(termOrd, sameCounts[i]);
604 }
605 sameCounts[i] = 1;
606 }
607 break;
608 }
609 }
610
611 for(;i<lastTermLength;i++) {
612 if (sameCounts[i] >= minSkipCount) {
613
614 saveSkip(termOrd, sameCounts[i]);
615 }
616 sameCounts[i] = 0;
617 }
618 for(int j=limit;j<termLength;j++) {
619 sameCounts[j] = 1;
620 }
621 } else {
622 for(int i=0;i<termLength;i++) {
623 sameCounts[i]++;
624 }
625 }
626 }
627
628 private void finishSkips() {
629 assert count == terms.length;
630 int lastTermOffset = termOffsets[count-1];
631 int lastTermLength = termOffsets[count] - lastTermOffset;
632
633 for(int i=0;i<lastTermLength;i++) {
634 if (sameCounts[i] >= minSkipCount) {
635
636 saveSkip(count, sameCounts[i]);
637 }
638 }
639
640
641 for(int termID=0;termID<terms.length;termID++) {
642 TermAndSkip term = terms[termID];
643 if (term.skips != null && term.skips.length > 1) {
644 for(int pos=0;pos<term.skips.length/2;pos++) {
645 final int otherPos = term.skips.length-pos-1;
646
647 final int temp = term.skips[pos];
648 term.skips[pos] = term.skips[otherPos];
649 term.skips[otherPos] = temp;
650 }
651 }
652 }
653 }
654
655 private void saveSkip(int ord, int backCount) {
656 final TermAndSkip term = terms[ord - backCount];
657 skipCount++;
658 if (term.skips == null) {
659 term.skips = new int[] {ord};
660 } else {
661
662
663
664
665 final int[] newSkips = new int[term.skips.length+1];
666 System.arraycopy(term.skips, 0, newSkips, 0, term.skips.length);
667 term.skips = newSkips;
668 term.skips[term.skips.length-1] = ord;
669 }
670 }
671
672 @Override
673 public TermsEnum iterator() {
674 return new DirectTermsEnum();
675 }
676
677 @Override
678 public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) {
679 return new DirectIntersectTermsEnum(compiled, startTerm);
680 }
681
682 @Override
683 public long size() {
684 return terms.length;
685 }
686
687 @Override
688 public long getSumTotalTermFreq() {
689 return sumTotalTermFreq;
690 }
691
692 @Override
693 public long getSumDocFreq() {
694 return sumDocFreq;
695 }
696
697 @Override
698 public int getDocCount() {
699 return docCount;
700 }
701
702 @Override
703 public boolean hasFreqs() {
704 return hasFreq;
705 }
706
707 @Override
708 public boolean hasOffsets() {
709 return hasOffsets;
710 }
711
712 @Override
713 public boolean hasPositions() {
714 return hasPos;
715 }
716
717 @Override
718 public boolean hasPayloads() {
719 return hasPayloads;
720 }
721
722 private final class DirectTermsEnum extends TermsEnum {
723
724 private final BytesRef scratch = new BytesRef();
725 private int termOrd;
726
727 private DirectTermsEnum() {
728 termOrd = -1;
729 }
730
731 private BytesRef setTerm() {
732 scratch.bytes = termBytes;
733 scratch.offset = termOffsets[termOrd];
734 scratch.length = termOffsets[termOrd+1] - termOffsets[termOrd];
735 return scratch;
736 }
737
738 @Override
739 public BytesRef next() {
740 termOrd++;
741 if (termOrd < terms.length) {
742 return setTerm();
743 } else {
744 return null;
745 }
746 }
747
748 @Override
749 public TermState termState() {
750 OrdTermState state = new OrdTermState();
751 state.ord = termOrd;
752 return state;
753 }
754
755
756
757 private int findTerm(BytesRef term) {
758
759
760
761 int low = 0;
762 int high = terms.length-1;
763
764 while (low <= high) {
765 int mid = (low + high) >>> 1;
766 int cmp = compare(mid, term);
767 if (cmp < 0) {
768 low = mid + 1;
769 } else if (cmp > 0) {
770 high = mid - 1;
771 } else {
772 return mid;
773 }
774 }
775
776 return -(low + 1);
777 }
778
779 @Override
780 public SeekStatus seekCeil(BytesRef term) {
781
782
783
784
785 final int ord = findTerm(term);
786
787
788
789 if (ord >= 0) {
790 termOrd = ord;
791 setTerm();
792 return SeekStatus.FOUND;
793 } else if (ord == -terms.length-1) {
794 return SeekStatus.END;
795 } else {
796 termOrd = -ord - 1;
797 setTerm();
798 return SeekStatus.NOT_FOUND;
799 }
800 }
801
802 @Override
803 public boolean seekExact(BytesRef term) {
804
805
806
807
808 final int ord = findTerm(term);
809 if (ord >= 0) {
810 termOrd = ord;
811 setTerm();
812 return true;
813 } else {
814 return false;
815 }
816 }
817
818 @Override
819 public void seekExact(long ord) {
820 termOrd = (int) ord;
821 setTerm();
822 }
823
824 @Override
825 public void seekExact(BytesRef term, TermState state) throws IOException {
826 termOrd = (int) ((OrdTermState) state).ord;
827 setTerm();
828 assert term.equals(scratch);
829 }
830
831 @Override
832 public BytesRef term() {
833 return scratch;
834 }
835
836 @Override
837 public long ord() {
838 return termOrd;
839 }
840
841 @Override
842 public int docFreq() {
843 if (terms[termOrd] instanceof LowFreqTerm) {
844 return ((LowFreqTerm) terms[termOrd]).docFreq;
845 } else {
846 return ((HighFreqTerm) terms[termOrd]).docIDs.length;
847 }
848 }
849
850 @Override
851 public long totalTermFreq() {
852 if (terms[termOrd] instanceof LowFreqTerm) {
853 return ((LowFreqTerm) terms[termOrd]).totalTermFreq;
854 } else {
855 return ((HighFreqTerm) terms[termOrd]).totalTermFreq;
856 }
857 }
858
859 @Override
860 public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
861
862 if (PostingsEnum.featureRequested(flags, DocsAndPositionsEnum.OLD_NULL_SEMANTICS)) {
863 if (!hasPos) {
864
865 return null;
866 }
867 }
868
869
870
871
872
873 if (PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
874
875 if (terms[termOrd] instanceof LowFreqTerm) {
876 final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
877 final int[] postings = term.postings;
878 if (hasFreq == false) {
879 LowFreqDocsEnumNoTF docsEnum;
880 if (reuse instanceof LowFreqDocsEnumNoTF) {
881 docsEnum = (LowFreqDocsEnumNoTF) reuse;
882 } else {
883 docsEnum = new LowFreqDocsEnumNoTF();
884 }
885
886 return docsEnum.reset(postings);
887
888 } else if (hasPos == false) {
889 LowFreqDocsEnumNoPos docsEnum;
890 if (reuse instanceof LowFreqDocsEnumNoPos) {
891 docsEnum = (LowFreqDocsEnumNoPos) reuse;
892 } else {
893 docsEnum = new LowFreqDocsEnumNoPos();
894 }
895
896 return docsEnum.reset(postings);
897 }
898 final byte[] payloads = term.payloads;
899 return new LowFreqPostingsEnum(hasOffsets, hasPayloads).reset(postings, payloads);
900 } else {
901 final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
902 if (hasPos == false) {
903 return new HighFreqDocsEnum().reset(term.docIDs, term.freqs);
904 } else {
905 return new HighFreqPostingsEnum(hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
906 }
907 }
908 }
909
910 if (terms[termOrd] instanceof LowFreqTerm) {
911 final int[] postings = ((LowFreqTerm) terms[termOrd]).postings;
912 if (hasFreq) {
913 if (hasPos) {
914 int posLen;
915 if (hasOffsets) {
916 posLen = 3;
917 } else {
918 posLen = 1;
919 }
920 if (hasPayloads) {
921 posLen++;
922 }
923 LowFreqDocsEnum docsEnum;
924 if (reuse instanceof LowFreqDocsEnum) {
925 docsEnum = (LowFreqDocsEnum) reuse;
926 if (!docsEnum.canReuse(posLen)) {
927 docsEnum = new LowFreqDocsEnum(posLen);
928 }
929 } else {
930 docsEnum = new LowFreqDocsEnum(posLen);
931 }
932
933 return docsEnum.reset(postings);
934 } else {
935 LowFreqDocsEnumNoPos docsEnum;
936 if (reuse instanceof LowFreqDocsEnumNoPos) {
937 docsEnum = (LowFreqDocsEnumNoPos) reuse;
938 } else {
939 docsEnum = new LowFreqDocsEnumNoPos();
940 }
941
942 return docsEnum.reset(postings);
943 }
944 } else {
945 LowFreqDocsEnumNoTF docsEnum;
946 if (reuse instanceof LowFreqDocsEnumNoTF) {
947 docsEnum = (LowFreqDocsEnumNoTF) reuse;
948 } else {
949 docsEnum = new LowFreqDocsEnumNoTF();
950 }
951
952 return docsEnum.reset(postings);
953 }
954 } else {
955 final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
956
957 HighFreqDocsEnum docsEnum;
958 if (reuse instanceof HighFreqDocsEnum) {
959 docsEnum = (HighFreqDocsEnum) reuse;
960 } else {
961 docsEnum = new HighFreqDocsEnum();
962 }
963
964
965 return docsEnum.reset(term.docIDs, term.freqs);
966 }
967 }
968
969 }
970
971 private final class DirectIntersectTermsEnum extends TermsEnum {
972 private final RunAutomaton runAutomaton;
973 private final CompiledAutomaton compiledAutomaton;
974 private int termOrd;
975 private final BytesRef scratch = new BytesRef();
976
977 private final class State {
978 int changeOrd;
979 int state;
980 int transitionUpto;
981 int transitionCount;
982 int transitionMax;
983 int transitionMin;
984 final Transition transition = new Transition();
985 }
986
987 private State[] states;
988 private int stateUpto;
989
990 public DirectIntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) {
991 runAutomaton = compiled.runAutomaton;
992 compiledAutomaton = compiled;
993 termOrd = -1;
994 states = new State[1];
995 states[0] = new State();
996 states[0].changeOrd = terms.length;
997 states[0].state = runAutomaton.getInitialState();
998 states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state);
999 compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition);
1000 states[0].transitionUpto = -1;
1001 states[0].transitionMax = -1;
1002
1003
1004
1005 if (startTerm != null) {
1006 int skipUpto = 0;
1007 if (startTerm.length == 0) {
1008 if (terms.length > 0 && termOffsets[1] == 0) {
1009 termOrd = 0;
1010 }
1011 } else {
1012 termOrd++;
1013
1014 nextLabel:
1015 for(int i=0;i<startTerm.length;i++) {
1016 final int label = startTerm.bytes[startTerm.offset+i] & 0xFF;
1017
1018 while (label > states[i].transitionMax) {
1019 states[i].transitionUpto++;
1020 assert states[i].transitionUpto < states[i].transitionCount;
1021 compiledAutomaton.automaton.getNextTransition(states[i].transition);
1022 states[i].transitionMin = states[i].transition.min;
1023 states[i].transitionMax = states[i].transition.max;
1024 assert states[i].transitionMin >= 0;
1025 assert states[i].transitionMin <= 255;
1026 assert states[i].transitionMax >= 0;
1027 assert states[i].transitionMax <= 255;
1028 }
1029
1030
1031
1032 while (termOrd < terms.length) {
1033 final int skipOffset = skipOffsets[termOrd];
1034 final int numSkips = skipOffsets[termOrd+1] - skipOffset;
1035 final int termOffset = termOffsets[termOrd];
1036 final int termLength = termOffsets[1+termOrd] - termOffset;
1037
1038
1039
1040
1041
1042 if (termOrd == states[stateUpto].changeOrd) {
1043
1044
1045
1046 stateUpto--;
1047 termOrd--;
1048 return;
1049 }
1050
1051 if (termLength == i) {
1052 termOrd++;
1053 skipUpto = 0;
1054
1055
1056
1057 } else if (label < (termBytes[termOffset+i] & 0xFF)) {
1058 termOrd--;
1059
1060
1061
1062 stateUpto -= skipUpto;
1063 assert stateUpto >= 0;
1064 return;
1065 } else if (label == (termBytes[termOffset+i] & 0xFF)) {
1066
1067
1068
1069 if (skipUpto < numSkips) {
1070 grow();
1071
1072 final int nextState = runAutomaton.step(states[stateUpto].state, label);
1073
1074
1075 assert nextState != -1;
1076
1077 stateUpto++;
1078 states[stateUpto].changeOrd = skips[skipOffset + skipUpto++];
1079 states[stateUpto].state = nextState;
1080 states[stateUpto].transitionCount = compiledAutomaton.automaton.getNumTransitions(nextState);
1081 compiledAutomaton.automaton.initTransition(states[stateUpto].state, states[stateUpto].transition);
1082 states[stateUpto].transitionUpto = -1;
1083 states[stateUpto].transitionMax = -1;
1084
1085
1086
1087
1088
1089
1090
1091 continue nextLabel;
1092 } else {
1093
1094
1095
1096
1097
1098
1099 final int startTermOrd = termOrd;
1100 while (termOrd < terms.length && compare(termOrd, startTerm) <= 0) {
1101 assert termOrd == startTermOrd || skipOffsets[termOrd] == skipOffsets[termOrd+1];
1102 termOrd++;
1103 }
1104 assert termOrd - startTermOrd < minSkipCount;
1105 termOrd--;
1106 stateUpto -= skipUpto;
1107
1108
1109
1110 return;
1111 }
1112 } else {
1113 if (skipUpto < numSkips) {
1114 termOrd = skips[skipOffset + skipUpto];
1115
1116
1117
1118 } else {
1119
1120
1121
1122 termOrd++;
1123 }
1124 skipUpto = 0;
1125 }
1126 }
1127
1128
1129
1130 termOrd--;
1131
1132
1133
1134 return;
1135 }
1136 }
1137
1138 final int termOffset = termOffsets[termOrd];
1139 final int termLen = termOffsets[1+termOrd] - termOffset;
1140
1141 if (termOrd >= 0 && !startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) {
1142 stateUpto -= skipUpto;
1143 termOrd--;
1144 }
1145
1146
1147
1148 }
1149 }
1150
1151 private void grow() {
1152 if (states.length == 1+stateUpto) {
1153 final State[] newStates = new State[states.length+1];
1154 System.arraycopy(states, 0, newStates, 0, states.length);
1155 newStates[states.length] = new State();
1156 states = newStates;
1157 }
1158 }
1159
1160 @Override
1161 public BytesRef next() {
1162
1163
1164
1165
1166 termOrd++;
1167 int skipUpto = 0;
1168
1169 if (termOrd == 0 && termOffsets[1] == 0) {
1170
1171 assert stateUpto == 0;
1172
1173
1174
1175 if (runAutomaton.isAccept(states[0].state)) {
1176 scratch.bytes = termBytes;
1177 scratch.offset = 0;
1178 scratch.length = 0;
1179 return scratch;
1180 }
1181 termOrd++;
1182 }
1183
1184 nextTerm:
1185
1186 while (true) {
1187
1188
1189
1190 if (termOrd == terms.length) {
1191
1192
1193
1194 return null;
1195 }
1196
1197 final State state = states[stateUpto];
1198 if (termOrd == state.changeOrd) {
1199
1200
1201
1202
1203 stateUpto--;
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215 continue;
1216 }
1217
1218 final int termOffset = termOffsets[termOrd];
1219 final int termLength = termOffsets[termOrd+1] - termOffset;
1220 final int skipOffset = skipOffsets[termOrd];
1221 final int numSkips = skipOffsets[termOrd+1] - skipOffset;
1222
1223
1224
1225
1226
1227 assert termOrd < state.changeOrd;
1228
1229 assert stateUpto <= termLength: "term.length=" + termLength + "; stateUpto=" + stateUpto;
1230 final int label = termBytes[termOffset+stateUpto] & 0xFF;
1231
1232 while (label > state.transitionMax) {
1233
1234 state.transitionUpto++;
1235 if (state.transitionUpto == state.transitionCount) {
1236
1237
1238
1239 if (stateUpto == 0) {
1240 termOrd = terms.length;
1241 return null;
1242 } else {
1243 assert state.changeOrd > termOrd;
1244
1245
1246
1247
1248 termOrd = states[stateUpto].changeOrd;
1249 skipUpto = 0;
1250 stateUpto--;
1251 }
1252 continue nextTerm;
1253 }
1254 compiledAutomaton.automaton.getNextTransition(state.transition);
1255 assert state.transitionUpto < state.transitionCount: " state.transitionUpto=" + state.transitionUpto + " vs " + state.transitionCount;
1256 state.transitionMin = state.transition.min;
1257 state.transitionMax = state.transition.max;
1258 assert state.transitionMin >= 0;
1259 assert state.transitionMin <= 255;
1260 assert state.transitionMax >= 0;
1261 assert state.transitionMax <= 255;
1262 }
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273 final int targetLabel = state.transitionMin;
1274
1275 if ((termBytes[termOffset+stateUpto] & 0xFF) < targetLabel) {
1276
1277
1278
1279
1280 int low = termOrd+1;
1281 int high = state.changeOrd-1;
1282 while (true) {
1283 if (low > high) {
1284
1285 termOrd = low;
1286
1287
1288
1289
1290 skipUpto = 0;
1291 continue nextTerm;
1292 }
1293 int mid = (low + high) >>> 1;
1294 int cmp = (termBytes[termOffsets[mid] + stateUpto] & 0xFF) - targetLabel;
1295
1296
1297
1298 if (cmp < 0) {
1299 low = mid+1;
1300 } else if (cmp > 0) {
1301 high = mid - 1;
1302 } else {
1303
1304
1305 while (mid > termOrd && (termBytes[termOffsets[mid-1] + stateUpto] & 0xFF) == targetLabel) {
1306 mid--;
1307 }
1308 termOrd = mid;
1309
1310
1311
1312
1313 skipUpto = 0;
1314 continue nextTerm;
1315 }
1316 }
1317 }
1318
1319 int nextState = runAutomaton.step(states[stateUpto].state, label);
1320
1321 if (nextState == -1) {
1322
1323
1324
1325
1326 if (skipUpto < numSkips) {
1327
1328
1329
1330 termOrd = skips[skipOffset+skipUpto];
1331 } else {
1332 termOrd++;
1333 }
1334 skipUpto = 0;
1335 } else if (skipUpto < numSkips) {
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351 grow();
1352 stateUpto++;
1353 states[stateUpto].state = nextState;
1354 states[stateUpto].changeOrd = skips[skipOffset + skipUpto++];
1355 states[stateUpto].transitionCount = compiledAutomaton.automaton.getNumTransitions(nextState);
1356 compiledAutomaton.automaton.initTransition(nextState, states[stateUpto].transition);
1357 states[stateUpto].transitionUpto = -1;
1358 states[stateUpto].transitionMax = -1;
1359
1360 if (stateUpto == termLength) {
1361
1362
1363
1364 if (runAutomaton.isAccept(nextState)) {
1365
1366
1367
1368 scratch.bytes = termBytes;
1369 scratch.offset = termOffsets[termOrd];
1370 scratch.length = termOffsets[1+termOrd] - scratch.offset;
1371
1372
1373
1374 return scratch;
1375 } else {
1376
1377
1378
1379 termOrd++;
1380 skipUpto = 0;
1381 }
1382 }
1383 } else {
1384
1385
1386
1387
1388 if (compiledAutomaton.commonSuffixRef != null) {
1389
1390 assert compiledAutomaton.commonSuffixRef.offset == 0;
1391 if (termLength < compiledAutomaton.commonSuffixRef.length) {
1392 termOrd++;
1393 skipUpto = 0;
1394 continue nextTerm;
1395 }
1396 int offset = termOffset + termLength - compiledAutomaton.commonSuffixRef.length;
1397 for(int suffix=0;suffix<compiledAutomaton.commonSuffixRef.length;suffix++) {
1398 if (termBytes[offset + suffix] != compiledAutomaton.commonSuffixRef.bytes[suffix]) {
1399 termOrd++;
1400 skipUpto = 0;
1401 continue nextTerm;
1402 }
1403 }
1404 }
1405
1406 int upto = stateUpto+1;
1407 while (upto < termLength) {
1408 nextState = runAutomaton.step(nextState, termBytes[termOffset+upto] & 0xFF);
1409 if (nextState == -1) {
1410 termOrd++;
1411 skipUpto = 0;
1412
1413
1414
1415 continue nextTerm;
1416 }
1417 upto++;
1418 }
1419
1420 if (runAutomaton.isAccept(nextState)) {
1421 scratch.bytes = termBytes;
1422 scratch.offset = termOffsets[termOrd];
1423 scratch.length = termOffsets[1+termOrd] - scratch.offset;
1424
1425
1426
1427
1428 return scratch;
1429 } else {
1430 termOrd++;
1431 skipUpto = 0;
1432
1433
1434
1435 }
1436 }
1437 }
1438 }
1439
1440 @Override
1441 public TermState termState() {
1442 OrdTermState state = new OrdTermState();
1443 state.ord = termOrd;
1444 return state;
1445 }
1446
1447 @Override
1448 public BytesRef term() {
1449 return scratch;
1450 }
1451
1452 @Override
1453 public long ord() {
1454 return termOrd;
1455 }
1456
1457 @Override
1458 public int docFreq() {
1459 if (terms[termOrd] instanceof LowFreqTerm) {
1460 return ((LowFreqTerm) terms[termOrd]).docFreq;
1461 } else {
1462 return ((HighFreqTerm) terms[termOrd]).docIDs.length;
1463 }
1464 }
1465
1466 @Override
1467 public long totalTermFreq() {
1468 if (terms[termOrd] instanceof LowFreqTerm) {
1469 return ((LowFreqTerm) terms[termOrd]).totalTermFreq;
1470 } else {
1471 return ((HighFreqTerm) terms[termOrd]).totalTermFreq;
1472 }
1473 }
1474
1475 @Override
1476 public PostingsEnum postings(PostingsEnum reuse, int flags) {
1477
1478 if (PostingsEnum.featureRequested(flags, DocsAndPositionsEnum.OLD_NULL_SEMANTICS)) {
1479 if (!hasPos) {
1480
1481 return null;
1482 }
1483 }
1484
1485
1486
1487
1488
1489 if (hasPos && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
1490 if (terms[termOrd] instanceof LowFreqTerm) {
1491 final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
1492 final int[] postings = term.postings;
1493 final byte[] payloads = term.payloads;
1494 return new LowFreqPostingsEnum(hasOffsets, hasPayloads).reset(postings, payloads);
1495 } else {
1496 final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
1497 return new HighFreqPostingsEnum(hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
1498 }
1499 }
1500
1501 if (terms[termOrd] instanceof LowFreqTerm) {
1502 final int[] postings = ((LowFreqTerm) terms[termOrd]).postings;
1503 if (hasFreq) {
1504 if (hasPos) {
1505 int posLen;
1506 if (hasOffsets) {
1507 posLen = 3;
1508 } else {
1509 posLen = 1;
1510 }
1511 if (hasPayloads) {
1512 posLen++;
1513 }
1514 return new LowFreqDocsEnum(posLen).reset(postings);
1515 } else {
1516 return new LowFreqDocsEnumNoPos().reset(postings);
1517 }
1518 } else {
1519 return new LowFreqDocsEnumNoTF().reset(postings);
1520 }
1521 } else {
1522 final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
1523
1524 return new HighFreqDocsEnum().reset(term.docIDs, term.freqs);
1525 }
1526 }
1527
1528 @Override
1529 public SeekStatus seekCeil(BytesRef term) {
1530 throw new UnsupportedOperationException();
1531 }
1532
1533 @Override
1534 public void seekExact(long ord) {
1535 throw new UnsupportedOperationException();
1536 }
1537 }
1538 }
1539
1540
1541 private final static class LowFreqDocsEnumNoTF extends PostingsEnum {
1542 private int[] postings;
1543 private int upto;
1544
1545 public PostingsEnum reset(int[] postings) {
1546 this.postings = postings;
1547 upto = -1;
1548 return this;
1549 }
1550
1551
1552
1553 @Override
1554 public int nextDoc() {
1555 upto++;
1556 if (upto < postings.length) {
1557 return postings[upto];
1558 }
1559 return NO_MORE_DOCS;
1560 }
1561
1562 @Override
1563 public int docID() {
1564 if (upto < 0) {
1565 return -1;
1566 } else if (upto < postings.length) {
1567 return postings[upto];
1568 } else {
1569 return NO_MORE_DOCS;
1570 }
1571 }
1572
1573 @Override
1574 public int freq() {
1575 return 1;
1576 }
1577
1578 @Override
1579 public int nextPosition() throws IOException {
1580 return -1;
1581 }
1582
1583 @Override
1584 public int startOffset() throws IOException {
1585 return -1;
1586 }
1587
1588 @Override
1589 public int endOffset() throws IOException {
1590 return -1;
1591 }
1592
1593 @Override
1594 public BytesRef getPayload() throws IOException {
1595 return null;
1596 }
1597
1598 @Override
1599 public int advance(int target) throws IOException {
1600
1601
1602 return slowAdvance(target);
1603 }
1604
1605 @Override
1606 public long cost() {
1607 return postings.length;
1608 }
1609 }
1610
1611
1612 private final static class LowFreqDocsEnumNoPos extends PostingsEnum {
1613 private int[] postings;
1614 private int upto;
1615
1616 public LowFreqDocsEnumNoPos() {}
1617
1618 public PostingsEnum reset(int[] postings) {
1619 this.postings = postings;
1620 upto = -2;
1621 return this;
1622 }
1623
1624
1625 @Override
1626 public int nextDoc() {
1627 upto += 2;
1628 if (upto < postings.length) {
1629 return postings[upto];
1630 }
1631 return NO_MORE_DOCS;
1632 }
1633
1634 @Override
1635 public int docID() {
1636 if (upto < 0) {
1637 return -1;
1638 } else if (upto < postings.length) {
1639 return postings[upto];
1640 } else {
1641 return NO_MORE_DOCS;
1642 }
1643 }
1644
1645 @Override
1646 public int freq() {
1647 return postings[upto+1];
1648 }
1649
1650 @Override
1651 public int nextPosition() throws IOException {
1652 return -1;
1653 }
1654
1655 @Override
1656 public int startOffset() throws IOException {
1657 return -1;
1658 }
1659
1660 @Override
1661 public int endOffset() throws IOException {
1662 return -1;
1663 }
1664
1665 @Override
1666 public BytesRef getPayload() throws IOException {
1667 return null;
1668 }
1669
1670 @Override
1671 public int advance(int target) throws IOException {
1672
1673
1674 return slowAdvance(target);
1675 }
1676
1677 @Override
1678 public long cost() {
1679 return postings.length / 2;
1680 }
1681 }
1682
1683
1684 private final static class LowFreqDocsEnum extends PostingsEnum {
1685 private int[] postings;
1686 private final int posMult;
1687 private int upto;
1688 private int freq;
1689
1690 public LowFreqDocsEnum(int posMult) {
1691 this.posMult = posMult;
1692
1693
1694
1695 }
1696
1697 public boolean canReuse(int posMult) {
1698 return this.posMult == posMult;
1699 }
1700
1701 public PostingsEnum reset(int[] postings) {
1702 this.postings = postings;
1703 upto = -2;
1704 freq = 0;
1705 return this;
1706 }
1707
1708
1709 @Override
1710 public int nextDoc() {
1711 upto += 2 + freq*posMult;
1712
1713
1714
1715 if (upto < postings.length) {
1716 freq = postings[upto+1];
1717 assert freq > 0;
1718 return postings[upto];
1719 }
1720 return NO_MORE_DOCS;
1721 }
1722
1723 @Override
1724 public int docID() {
1725
1726 if (upto < 0) {
1727 return -1;
1728 } else if (upto < postings.length) {
1729 return postings[upto];
1730 } else {
1731 return NO_MORE_DOCS;
1732 }
1733 }
1734
1735 @Override
1736 public int freq() {
1737
1738 return freq;
1739 }
1740
1741 @Override
1742 public int nextPosition() throws IOException {
1743 return -1;
1744 }
1745
1746 @Override
1747 public int startOffset() throws IOException {
1748 return -1;
1749 }
1750
1751 @Override
1752 public int endOffset() throws IOException {
1753 return -1;
1754 }
1755
1756 @Override
1757 public BytesRef getPayload() throws IOException {
1758 return null;
1759 }
1760
1761 @Override
1762 public int advance(int target) throws IOException {
1763
1764
1765 return slowAdvance(target);
1766 }
1767
1768 @Override
1769 public long cost() {
1770
1771 return postings.length / 2;
1772 }
1773 }
1774
1775 private final static class LowFreqPostingsEnum extends PostingsEnum {
1776 private int[] postings;
1777 private final int posMult;
1778 private final boolean hasOffsets;
1779 private final boolean hasPayloads;
1780 private final BytesRef payload = new BytesRef();
1781 private int upto;
1782 private int docID;
1783 private int freq;
1784 private int skipPositions;
1785 private int pos;
1786 private int startOffset;
1787 private int endOffset;
1788 private int lastPayloadOffset;
1789 private int payloadOffset;
1790 private int payloadLength;
1791 private byte[] payloadBytes;
1792
1793 public LowFreqPostingsEnum(boolean hasOffsets, boolean hasPayloads) {
1794 this.hasOffsets = hasOffsets;
1795 this.hasPayloads = hasPayloads;
1796 if (hasOffsets) {
1797 if (hasPayloads) {
1798 posMult = 4;
1799 } else {
1800 posMult = 3;
1801 }
1802 } else if (hasPayloads) {
1803 posMult = 2;
1804 } else {
1805 posMult = 1;
1806 }
1807 }
1808
1809 public PostingsEnum reset(int[] postings, byte[] payloadBytes) {
1810 this.postings = postings;
1811 upto = 0;
1812 skipPositions = 0;
1813 pos = -1;
1814 startOffset = -1;
1815 endOffset = -1;
1816 docID = -1;
1817 payloadLength = 0;
1818 this.payloadBytes = payloadBytes;
1819 return this;
1820 }
1821
1822 @Override
1823 public int nextDoc() {
1824 pos = -1;
1825 if (hasPayloads) {
1826 for(int i=0;i<skipPositions;i++) {
1827 upto++;
1828 if (hasOffsets) {
1829 upto += 2;
1830 }
1831 payloadOffset += postings[upto++];
1832 }
1833 } else {
1834 upto += posMult * skipPositions;
1835 }
1836
1837 if (upto < postings.length) {
1838 docID = postings[upto++];
1839 freq = postings[upto++];
1840 skipPositions = freq;
1841 return docID;
1842 }
1843
1844 return docID = NO_MORE_DOCS;
1845 }
1846
1847 @Override
1848 public int docID() {
1849 return docID;
1850 }
1851
1852 @Override
1853 public int freq() {
1854 return freq;
1855 }
1856
1857 @Override
1858 public int nextPosition() {
1859 assert skipPositions > 0;
1860 skipPositions--;
1861 pos = postings[upto++];
1862 if (hasOffsets) {
1863 startOffset = postings[upto++];
1864 endOffset = postings[upto++];
1865 }
1866 if (hasPayloads) {
1867 payloadLength = postings[upto++];
1868 lastPayloadOffset = payloadOffset;
1869 payloadOffset += payloadLength;
1870 }
1871 return pos;
1872 }
1873
1874 @Override
1875 public int startOffset() {
1876 return startOffset;
1877 }
1878
1879 @Override
1880 public int endOffset() {
1881 return endOffset;
1882 }
1883
1884 @Override
1885 public int advance(int target) throws IOException {
1886 return slowAdvance(target);
1887 }
1888
1889 @Override
1890 public BytesRef getPayload() {
1891 if (payloadLength > 0) {
1892 payload.bytes = payloadBytes;
1893 payload.offset = lastPayloadOffset;
1894 payload.length = payloadLength;
1895 return payload;
1896 } else {
1897 return null;
1898 }
1899 }
1900
1901 @Override
1902 public long cost() {
1903
1904 return postings.length / 2;
1905 }
1906 }
1907
1908
1909 private final static class HighFreqDocsEnum extends PostingsEnum {
1910 private int[] docIDs;
1911 private int[] freqs;
1912 private int upto;
1913 private int docID = -1;
1914
1915 public HighFreqDocsEnum() {}
1916
1917 public int[] getDocIDs() {
1918 return docIDs;
1919 }
1920
1921 public int[] getFreqs() {
1922 return freqs;
1923 }
1924
1925 public PostingsEnum reset(int[] docIDs, int[] freqs) {
1926 this.docIDs = docIDs;
1927 this.freqs = freqs;
1928 docID = upto = -1;
1929 return this;
1930 }
1931
1932 @Override
1933 public int nextDoc() {
1934 upto++;
1935 try {
1936 return docID = docIDs[upto];
1937 } catch (ArrayIndexOutOfBoundsException e) {
1938 }
1939 return docID = NO_MORE_DOCS;
1940 }
1941
1942 @Override
1943 public int docID() {
1944 return docID;
1945 }
1946
1947 @Override
1948 public int freq() {
1949 if (freqs == null) {
1950 return 1;
1951 } else {
1952 return freqs[upto];
1953 }
1954 }
1955
1956 @Override
1957 public int advance(int target) {
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988 upto++;
1989 if (upto == docIDs.length) {
1990 return docID = NO_MORE_DOCS;
1991 }
1992
1993
1994
1995 int inc = 10;
1996 int nextUpto = upto+10;
1997 int low;
1998 int high;
1999 while (true) {
2000
2001 if (nextUpto >= docIDs.length) {
2002 low = nextUpto-inc;
2003 high = docIDs.length-1;
2004 break;
2005 }
2006
2007
2008 if (target <= docIDs[nextUpto]) {
2009 low = nextUpto-inc;
2010 high = nextUpto;
2011 break;
2012 }
2013 inc *= 2;
2014 nextUpto += inc;
2015 }
2016
2017
2018
2019
2020 while (true) {
2021
2022 if (low > high) {
2023
2024
2025 upto = low;
2026 break;
2027 }
2028
2029 int mid = (low + high) >>> 1;
2030 int cmp = docIDs[mid] - target;
2031
2032
2033 if (cmp < 0) {
2034 low = mid + 1;
2035 } else if (cmp > 0) {
2036 high = mid - 1;
2037 } else {
2038
2039 upto = mid;
2040
2041 break;
2042 }
2043 }
2044
2045
2046
2047 if (upto == docIDs.length) {
2048
2049 return docID = NO_MORE_DOCS;
2050 } else {
2051
2052 return docID = docIDs[upto];
2053 }
2054 }
2055
2056 @Override
2057 public long cost() {
2058 return docIDs.length;
2059 }
2060
2061 @Override
2062 public int nextPosition() throws IOException {
2063 return -1;
2064 }
2065
2066 @Override
2067 public int startOffset() throws IOException {
2068 return -1;
2069 }
2070
2071 @Override
2072 public int endOffset() throws IOException {
2073 return -1;
2074 }
2075
2076 @Override
2077 public BytesRef getPayload() throws IOException {
2078 return null;
2079 }
2080 }
2081
2082
2083 private final static class HighFreqPostingsEnum extends PostingsEnum {
2084 private int[] docIDs;
2085 private int[] freqs;
2086 private int[][] positions;
2087 private byte[][][] payloads;
2088 private final boolean hasOffsets;
2089 private final int posJump;
2090 private int upto;
2091 private int docID = -1;
2092 private int posUpto;
2093 private int[] curPositions;
2094
2095 public HighFreqPostingsEnum(boolean hasOffsets) {
2096 this.hasOffsets = hasOffsets;
2097 posJump = hasOffsets ? 3 : 1;
2098 }
2099
2100 public int[] getDocIDs() {
2101 return docIDs;
2102 }
2103
2104 public int[][] getPositions() {
2105 return positions;
2106 }
2107
2108 public int getPosJump() {
2109 return posJump;
2110 }
2111
2112 public PostingsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) {
2113 this.docIDs = docIDs;
2114 this.freqs = freqs;
2115 this.positions = positions;
2116 this.payloads = payloads;
2117 upto = -1;
2118 return this;
2119 }
2120
2121 @Override
2122 public int nextDoc() {
2123 upto++;
2124 if (upto < docIDs.length) {
2125 posUpto = -posJump;
2126 curPositions = positions[upto];
2127 return docID = docIDs[upto];
2128 }
2129
2130 return docID = NO_MORE_DOCS;
2131 }
2132
2133 @Override
2134 public int freq() {
2135 return freqs[upto];
2136 }
2137
2138 @Override
2139 public int docID() {
2140 return docID;
2141 }
2142
2143 @Override
2144 public int nextPosition() {
2145 posUpto += posJump;
2146 assert posUpto < curPositions.length;
2147 return curPositions[posUpto];
2148 }
2149
2150 @Override
2151 public int startOffset() {
2152 if (hasOffsets) {
2153 return curPositions[posUpto+1];
2154 } else {
2155 return -1;
2156 }
2157 }
2158
2159 @Override
2160 public int endOffset() {
2161 if (hasOffsets) {
2162 return curPositions[posUpto+2];
2163 } else {
2164 return -1;
2165 }
2166 }
2167
2168 @Override
2169 public int advance(int target) {
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202 upto++;
2203 if (upto == docIDs.length) {
2204 return docID = NO_MORE_DOCS;
2205 }
2206
2207
2208
2209 int inc = 10;
2210 int nextUpto = upto+10;
2211 int low;
2212 int high;
2213 while (true) {
2214
2215 if (nextUpto >= docIDs.length) {
2216 low = nextUpto-inc;
2217 high = docIDs.length-1;
2218 break;
2219 }
2220
2221
2222 if (target <= docIDs[nextUpto]) {
2223 low = nextUpto-inc;
2224 high = nextUpto;
2225 break;
2226 }
2227 inc *= 2;
2228 nextUpto += inc;
2229 }
2230
2231
2232
2233
2234 while (true) {
2235
2236 if (low > high) {
2237
2238
2239 upto = low;
2240 break;
2241 }
2242
2243 int mid = (low + high) >>> 1;
2244 int cmp = docIDs[mid] - target;
2245
2246
2247 if (cmp < 0) {
2248 low = mid + 1;
2249 } else if (cmp > 0) {
2250 high = mid - 1;
2251 } else {
2252
2253 upto = mid;
2254
2255 break;
2256 }
2257 }
2258
2259
2260
2261 if (upto == docIDs.length) {
2262
2263 return docID = NO_MORE_DOCS;
2264 } else {
2265
2266 posUpto = -posJump;
2267 curPositions = positions[upto];
2268 return docID = docIDs[upto];
2269 }
2270 }
2271
2272 private final BytesRef payload = new BytesRef();
2273
2274 @Override
2275 public BytesRef getPayload() {
2276 if (payloads == null) {
2277 return null;
2278 } else {
2279 final byte[] payloadBytes = payloads[upto][posUpto/(hasOffsets ? 3:1)];
2280 if (payloadBytes == null) {
2281 return null;
2282 }
2283 payload.bytes = payloadBytes;
2284 payload.length = payloadBytes.length;
2285 payload.offset = 0;
2286 return payload;
2287 }
2288 }
2289
2290 @Override
2291 public long cost() {
2292 return docIDs.length;
2293 }
2294 }
2295 }